Helgi Ingimundarson - hi2179, Bradlee Spiece - bcs2149, Giovanni Gambarotta - gg2607, Jaydeep Soni - js4957
import sys
sys.path.append('../utils/')
import pandas.io.data as web
import seaborn as sns
from sqlalchemy import create_engine
import datetime
import pandas as pd
from pygments import highlight
from pygments.lexers.sql import SqlLexer
from pygments.formatters import HtmlFormatter, LatexFormatter
from IPython import display
import functools as ft
import matplotlib.pyplot as plt
import scipy as sp
import scipy.interpolate
from __future__ import division
import numpy as np
import matplotlib.dates as mdates
from scipy.optimize import minimize
from matplotlib.finance import candlestick2_ohlc
from datetime import date
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
from HTMLParser import HTMLParser
import requests
import re
from dateutil import parser
from progressbar import ProgressBar
from trading_days import TradingDay
from itertools import chain
from scipy.optimize import curve_fit
from datetime import date, timedelta
CONNECTION_STRING = 'mssql+pymssql://IVYuser:resuyvi@vita.ieor.columbia.edu'
# Gets the database connection
def get_connection():
engine = create_engine(CONNECTION_STRING)
return engine.connect()
# Query database and return results in dataframe
def query_dataframe(query, connection=None):
if connection is None:
connection = get_connection()
return pd.read_sql(query, connection)
# Query database using external file and return results in dataframe
def query_dataframe_f(filename, connection=None):
if connection is None:
connection = get_connection()
with open(filename, 'r') as handle:
return pd.read_sql(handle.read(), connection)
# Print sql query and query results
def print_and_query(filename, connection=None, use_latex=False):
if connection is None:
connection = get_connection()
with open(filename, 'r') as handle:
sql = handle.read()
if use_latex:
display_obj = display.Latex(highlight(
sql, SqlLexer(), LatexFormatter()))
else:
formatter = HtmlFormatter()
display_obj = display.HTML(
'<style type="text/css">{}</style>{}'
.format(
formatter.get_style_defs('.highlight'),
highlight(sql, SqlLexer(), formatter)))
display.display(
display_obj,
pd.read_sql(sql, connection)
)
def data_frame_to_sql(df, table_name):
engine = create_engine(CONNECTION_STRING)
df.to_sql(table_name, engine, if_exists='replace',index=False)
def trading_window(date,pre,post):
'''
Selecting a (-pre,+post) interval of trading days aroun date
'''
return pd.Series([(date + TradingDay(i))
for i in range(pre,post+1)])
def bus_days_between(start, end):
if start.date() == end.date():
return 0
elif start < end:
return len(pd.date_range(start=start.date(),
end=end.date(),
closed='left',
freq=TradingDay(1)))
else:
return -len(pd.date_range(start=end.date(),
end=start.date(),
closed='left',
freq=TradingDay(1)))
'''
American Option Pricer with dividends - Function call
ABM(ft.partial(VP,K=Strike,CallPut='OptType'),StockPrice,TimeToMaturity(1/360),
Interest Rate, Volatility, TreeSteps, Dividends))
Dividends are given in the format np.array([[time, dividend],....,])
'''
def BPTree(n, S, u, d, ex_div):
# Creating a binomial tree with dividends adjustment
r = [np.array([S])]
for i in range(n):
if (i in ex_div[:,0]):
adj = ex_div[ex_div[:,0]==i][0,1]
r.append(np.concatenate((r[-1][:1]*u-adj, r[-1]*d-adj)))
else: r.append(np.concatenate((r[-1][:1]*u, r[-1]*d)))
return r
def GBM(R, P, S, T, r, v, n, ex_div):
# Function returns the American option price
t = float(T)/n
T_range = np.arange(0,T+t,t)
# Selecting dividends before option's maturity date
curr_div = ex_div[ex_div[:,0]<=T]
div_idx = []
# Indexing the dividends ex date in the binomial tree
for i in range (len(curr_div[:,0])):
T_diff = T_range[1:]-curr_div[i,0]
T_diff[T_diff<0]=100000
div_idx.append(np.argmin(T_diff))
curr_div[:,0] = np.array(div_idx)
# Defining up/down binomial step coefficients
u = np.exp(v * np.sqrt(t))
d = 1./u
p = (np.exp(r * t) - d)/(u - d)
# Creating the binomial tree
ptree = BPTree(n, S, u, d, curr_div)[::-1]
# Defining a function for discounting and P-measure in the tree
R_ = ft.partial(R, np.exp(-r*t), p)
# Discounting through the tree with american exercise option
return ft.reduce(R_, map(P, ptree))[0]
def American(D, p, a, b):
# Selecting maximum between continuation and intrinsic option value
return np.maximum(b, D*(a[:-1]*p + a[1:]*(1-p)))
def VP(S, K, CallPut):
# Intrinsic value
if (CallPut=='C'): return np.maximum(S-K, 0)
else: return np.maximum(K-S, 0)
ABM = ft.partial(GBM, American)
def synthetic_ATM(ticker,earning_date,expiration,opt_type,method,day_range,steps):
'''
SYNTHETIC OPTIONS
Function returns a table with the following features for synthetic options
in the day_range around the earning date for a selected expiration of a
selected ticker between Date_Start and Date_End:
- Date.
- Stock Price.
- CallPut option type.
- Expiration.
- Strike.
- Interpolated MBBO.
- Implied volatility obtained by inverting Black-Scholes equation.
- Method is for chosing either DeltaStraddle=0 or Stike=StockPrice ATM
'''
# Obtaining the date range around the earning date
earning_date = datetime.datetime.date(earning_date)
date_range = trading_window(earning_date,
day_range[0],day_range[1]).astype(datetime.date)
# Get option ATM serieses
date_range = date_range[date_range<expiration]
sql_raw = open('ATM.sql', 'r').read()
sql_format = sql_raw.format(
ticker = ticker,
date_start = str(date_range.values[0])[:10], # yyyy-MM-dd
date_end = str(date_range.values[-1])[:10], # yyyy-MM-dd
date_expiration = str(expiration)[:10],
)
data = query_dataframe(sql_format)
# Synthetic options method selection
data = data[data.ATMethod==method]
# Obtaining Straddle Delta=0 synthetic strikes
delta_strikes = {}
if method=='D':
for date, df in data.groupby(['Date']):
delta_straddle = {}
for K,df_K in df.groupby('Strike'):
delta_straddle[K] = df_K.Delta.values[0]+df_K.Delta.values[1]
delta_straddle = pd.Series(delta_straddle,index=delta_straddle.keys())
spline = sp.interpolate.interp1d(delta_straddle.values,delta_straddle.index)
delta_strikes[date]=float(spline(0))
# Obtaining the dividends dates and dollar amount
query = ''' SELECT ExDate, Amount
FROM XFDATA.dbo.DISTRIBUTION dist
INNER JOIN XFDATA.dbo.SECURITY sec ON dist.SecurityID=sec.SecurityID
WHERE Ticker='%s' AND (DistributionType='%s' OR DistributionType='1')
AND ExDate BETWEEN '%s' AND '%s' ''' % (ticker,
'%',str(date_range.values[0])[:10],expiration)
dividends = query_dataframe(query)
# Creating an empty dataframe for output data
data_out = pd.DataFrame(columns=['Date','StockPrice','CallPut','Expiration',
'TimeToMaturity','Strike','MBBO','IV','ATM Method'])
# Construct a synthetic option on each date in the considered range
for date, df in data.groupby(['Date','CallPut']):
# Variables
S = df.ClosePrice.values[0]
# Selecting synthetic strike price based on the method
if method=='S':
X = df.ClosePrice.values[0]
else:
X = delta_strikes[date[0]]
CP = df.CallPut.values[0]
T = (bus_days_between(date[0],expiration))*1./252
r = df.ZeroRate.values[0]*0.01
# Setting an intital value for the IV in the optimization (avoiding -99)
if (df.ImpliedVolatility[df.ImpliedVolatility>0].mean()>0):
# Set the initial value of the IV to the mean of the 4 options considered
IV_0=df.ImpliedVolatility[df.ImpliedVolatility>0].mean()
else:
# Set the initial value to the mean IV of the previous day
IV_0=data.ImpliedVolatility[data.Date==(date[0]-datetime.timedelta(1))].mean()
# Time to ex dividend date
time_to_ExDate = np.array([bus_days_between(date[0],t)*1./252 for t in dividends.ExDate])
# Dividend table with maturity of Ex Div dates
div_to_expiration = np.array([time_to_ExDate,dividends.Amount]).T
# Linear option price interpolation of the closest option data
if X in df.Strike.values:
# Check if option exists with desired characteristics
MBBO_synthetic = float(df.MBBO.values[0])
else:
spline = sp.interpolate.interp1d(df.Strike.values,df.MBBO.values)
MBBO_synthetic = float(spline(X))
# Defining the objective function for optimization
def f(x):
return (ABM(ft.partial(VP,K=X,CallPut=CP),S, T, r, x, steps,
div_to_expiration[div_to_expiration[:,0]>=0])-MBBO_synthetic)**2
# Defining mimization constraints
cons = ({'type': 'ineq',
'fun' : lambda x: np.array(x),
'jac': lambda x: np.array([1.0])})
# Optimizing
res = minimize(f,IV_0,constraints=cons)
if method=='S': exp_method='ATM Strike'
else: exp_method='ATM Delta'
# Append data
s = pd.Series([date[0],S,CP,expiration,T,X,MBBO_synthetic,float(res.x),exp_method],
index=['Date','StockPrice','CallPut',
'Expiration','TimeToMaturity','Strike','MBBO','IV','ATM Method'])
data_out = data_out.append(s,ignore_index=True)
return data_out[data_out.CallPut==opt_type]
Pick three optionable stocks.
a) Using the Internet, make a table of announced earnings dates for the two-year period 6/1/2011-6/1/2013.
start_date = '2011-06-01'
end_date = '2013-06-01'
class EarningsParser(HTMLParser):
'''
Function parsing date data from HTML file
'''
store_dates = False
earnings_offset = None
dates = []
def __init__(self, *args, **kwargs):
#super().__init__(*args, **kwargs)
HTMLParser.__init__(self)
self.dates = []
def handle_starttag(self, tag, attrs):
if tag == 'table':
self.store_dates = True
def handle_data(self, data):
if self.store_dates:
match = re.match(r'\d+/\d+/\d+', data)
if match:
self.dates.append(match.group(0))
# If a company reports before the bell, record the earnings date
# being at midnight the day before. Ex: WMT reports 5/19/2016,
# but we want the reference point to be the closing price on 5/18/2016
if 'After Close' in data:
self.earnings_offset = timedelta(days=0)
elif 'Before Open' in data:
self.earnings_offset = timedelta(days=-1)
def handle_endtag(self, tag):
if tag == 'table':
self.store_dates = False
def earnings_releases(ticker,start_date,end_date):
'''
Function returns earning dates for the selected date range and ticker
from streetinsider.com
'''
user_agent = 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:46.0) '\
'Gecko/20100101 Firefox/46.0'
headers = {'user-agent': user_agent}
base_url = 'http://www.streetinsider.com/ec_earnings.php?q={}'\
.format(ticker)
e = EarningsParser()
s = requests.Session()
a = requests.adapters.HTTPAdapter(max_retries=0)
s.mount('http://', a)
e.feed(str(s.get(base_url, headers=headers).content))
if e.earnings_offset is not None:
dates = map(lambda x: parser.parse(x) + e.earnings_offset, e.dates)
past = filter(lambda x: x < datetime.datetime.now(), dates)
res = pd.DataFrame(list(map(lambda d: d.isoformat(), past)),
columns=['EarningDate']).astype(np.datetime64)
res = res[(res<np.datetime64(end_date))\
& (res>np.datetime64(start_date))]
res.dropna(inplace=True)
# Checking for the right number of earning dates (4/year)
earn_expected_n = int((parser.parse(end_date)-parser.parse(start_date)).days/365*4)
if len(res)<earn_expected_n: print('Check for possible missing earning dates')
return res
earnings_releases('AAPL',start_date,end_date)
earnings_releases('CELG',start_date,end_date)
earnings_releases('GOOG',start_date,end_date)
b) For each stock, identify the option series that will be: (A) the front month at earnings, (B) the next available series, (C) the first January leap (this will be no sooner than the fifth available option month).
def earnings_expirations(ticker,start_date, end_date):
'''
The function obtains earning date, front month, second month and January leap
expiration for the selected ticker and date range. Expirations if
recorded as saturdays on IVY will converted to previous trading day expirations.
(ATM query within synthetic_ATM() function works with these adjusted dates for
options expirations)
'''
df_out = pd.DataFrame(columns=['EarningDate','First','Second','JanLeap'])
# Obtaining earning dates from the web
df_out['EarningDate'] = earnings_releases(ticker,start_date,end_date).EarningDate
# Initializing the dataframe
df_out[['First','Second','JanLeap']] = datetime.datetime.now()
# Creating a SQL table Earnings
data_frame_to_sql(df_out,'Earnings')
sql_raw = open('Exp.sql', 'r').read()
sql_format = sql_raw.format(
ticker = ticker,
date_start = start_date,
date_end = end_date)
res = query_dataframe(sql_format)
df_out['First'] = res.First.where(res.First.dt.dayofweek!=5,
res.First[res.First.dt.dayofweek==5]-TradingDay(1)).values
df_out['Second'] = res.Second.where(res.Second.dt.dayofweek!=5,
res.Second[res.Second.dt.dayofweek==5]-TradingDay(1)).values
df_out['JanLeap'] = res.JanLeap.where(res.JanLeap.dt.dayofweek!=5,
res.JanLeap[res.JanLeap.dt.dayofweek==5]-TradingDay(1)).values
return df_out.reset_index(drop=True)
earnings_expirations('AAPL',start_date,end_date)
earnings_expirations('CELG',start_date,end_date)
earnings_expirations('GOOG',start_date,end_date)
We wish to follow the implied volatilities of the ATM straddles of various series beginning approximately three weeks before earnings and proceeding to one week after earnings.
NOTE: for this problem, we do not use the volatility surface tables in IVY to track the 50- delta vols. Why, you ask? The volatility surface table uses time averaging and therefore minimizes drops across earnings events.
a)
NOTE: The front month series may truncate with expiry but the other two will continue past expiration.
start_date = '2011-06-01'
end_date = '2012-06-01'
# Selecting a Stock
Ticker = 'AAPL'
# Earnings and expirations table
earnings_expirations(Ticker,start_date,end_date)
def earnings_option_series(ticker,start_date,end_date,opt_type):
res = {}
# Earnings and expirations table
df_earnings_exp = earnings_expirations(ticker,start_date,end_date)
for t in range(df_earnings_exp.shape[0]):
first_exp = df_earnings_exp.First.loc[t]
second_exp = df_earnings_exp.Second.loc[t]
janleap_exp = df_earnings_exp.JanLeap.loc[t]
earning_date = df_earnings_exp.EarningDate.loc[t]
first = synthetic_ATM(ticker,earning_date,first_exp,opt_type,'S',[-15,5],100)
second = synthetic_ATM(ticker,earning_date,second_exp,opt_type,'S',[-15,5],100)
janleap = synthetic_ATM(ticker,earning_date,janleap_exp,opt_type,'D',[-15,5],100)
if opt_type=='C': callput='Call'
else: callput='Put'
#fig = plt.figure(figsize=(15,6))
fig, ax1 = plt.subplots(figsize=(15,6))
ax2 = ax1.twinx()
# Plotting implied volatilities
ax1.plot(first.Date.values, first.IV,'-bo',
label=(callput+' IV First ' +str(first_exp)[:10]))
ax1.plot(second.Date.values, second.IV,'-go',
label=callput+' IV Second ' +str(second_exp)[:10])
ax1.plot(janleap.Date.values, janleap.IV,'-ro',
label=callput+' IV JanLeap '+str(janleap_exp)[:10])
# Plotting the stock price
ax2.plot(janleap.Date.values, janleap.StockPrice, '--k',
label=ticker + ' stock price')
ax1.set_ylabel('Implied Volatility')
ax2.set_ylabel('Stock Price')
ax1.legend(loc=2)
ax2.legend(loc=0)
plt.title(ticker +' Earning Date '+str(earning_date)[:10])
plt.show()
#fig = plt.figure(figsize=(15,6))
fig, ax1 = plt.subplots(figsize=(15,6))
ax2 = ax1.twinx()
# Plotting implied volatilities
ax1.plot(first.Date.values, first.IV,'-bo',
label=(callput+' IV First ' +str(first_exp)[:10]))
ax1.plot(second.Date.values, second.IV,'-go',
label=callput+' IV Second ' +str(second_exp)[:10])
ax1.plot(janleap.Date.values, janleap.IV,'-ro',
label=callput+' IV JanLeap '+str(janleap_exp)[:10])
# Plotting the stock price
ax2.plot(first.Date.values, first.MBBO.values,
'--k', label=ticker + ' ATM front month synthetic MBBO')
ax1.set_ylabel('Implied Volatility')
ax2.set_ylabel('MBBO')
ax1.legend(loc=2)
ax2.legend(loc=0)
plt.title(ticker +' Earning Date '+str(earning_date)[:10])
plt.show()
# Saving data
res[earning_date]={'First':first, 'Second':second,'JanLeap':janleap}
# restricting data before earning date
first = first[first.Date<earning_date]
second = second[second.Date<earning_date]
janleap = janleap[janleap.Date<earning_date]
idx_first = np.array(range(first.shape[0]))
idx_second = np.array(range(second.shape[0]))
idx_janleap = np.array(range(janleap.shape[0]))
# Quadratic fit
fig = plt.figure(figsize=(15,6))
fit_first = np.poly1d(np.polyfit(idx_first,first.IV.values,deg=2))
fit_second = np.poly1d(np.polyfit(idx_second,second.IV.values,deg=2))
fit_janleap = np.poly1d(np.polyfit(idx_janleap,janleap.IV.values,deg=2))
plt.plot(idx_first, first.IV.values,'-bo',
label=(callput+' IV First ' +str(first_exp)[:10]))
plt.plot(idx_first, fit_first(idx_first),'--b')
plt.plot(idx_second, second.IV.values,'-go',
label=(callput+' IV Second ' +str(second_exp)[:10]))
plt.plot(idx_second, fit_second(idx_second),'--g')
plt.plot(idx_janleap, janleap.IV.values,'-ro',
label=(callput+' IV JanLeap ' +str(janleap_exp)[:10]))
plt.plot(idx_janleap, fit_janleap(idx_janleap),'--r')
plt.grid(True)
plt.legend(loc=0)
plt.ylabel('Implied Volatility')
plt.title(ticker + ' Earning Date '+str(earning_date)[:10]+' quadratic fitting')
plt.show()
# Exponential fit
def func(x, a, b, c):
return a * np.exp(-b * x) + c
popt_first, pcov_first = curve_fit(func,idx_first,first.IV.values,maxfev=10000)
popt_second, pcov_second = curve_fit(func,idx_second,second.IV.values, maxfev=10000)
popt_janleap, pcov_janleap = curve_fit(func,idx_janleap,janleap.IV.values, maxfev=10000)
fig = plt.figure(figsize=(15,6))
plt.plot(idx_first, first.IV.values,'-bo',
label=(callput+' IV First ' +str(first_exp)[:10]))
plt.plot(idx_first, func(idx_first,*popt_first),'--b')
plt.plot(idx_second, second.IV.values,'-go',
label=(callput+' IV Second ' +str(second_exp)[:10]))
plt.plot(idx_second, func(idx_second,*popt_second),'--g')
plt.plot(idx_janleap, janleap.IV.values,'-ro',
label=(callput+' IV JanLeap ' +str(janleap_exp)[:10]))
plt.plot(idx_janleap, func(idx_janleap,*popt_janleap),'--r')
plt.grid(True)
plt.legend(loc=0)
plt.ylabel('Implied Volatility')
plt.title(ticker+' Earning Date '+str(earning_date)[:10]+' exponential fitting')
plt.show()
return res
res = earnings_option_series('AAPL','2011-06-01','2012-06-01','C')
res = earnings_option_series('AAPL','2011-06-01','2012-06-01','P')
b) Do volatilities drop discontinuously across the earnings dates?
As can be seen from the above plots ATM options implied volatilities drop discontinuosly for the the front contract and in some cases for the second contract. The impact of earnings is reflected on the implied volatility of the first month as we expected.
c) Try to fit the volatility profiles running up to earnings with parabolas. Can you see any regularity? Would exponentials do better?
Looking at the above fits it does seem like the parabola fits better the implied volatilities of the front month until earnings date. Exponential do not seem to do any better.
d) What difficulty arises with following individual strikes that we avoid by following synthetic strikes?
Using the synthetic stikes for following the behaviour of implied volatilities allow us to avoid the difficulties we may find in going from one strike to another when the undelying moves. When using either ATM strike method we interpolate two different strike prices to get the synthetic option implied volatility. This is probably more reliable than looking at a single strike. Same considerations apply to the ATM delta method using a synthetic delta=0 straddle.
Recall in the first lecture we discussed the possible impact of a large trade. Having a minute database allows us to examine the consequences of such a trade on a finer scale. Pick a stock from the LiveVol database and find the largest options trade to occur at least one week removed from an Earnings date and at least two times larger than any trade for the subsequent 3 days. We wish to examine minute by minute over the 3-day period.
a) Generate an implied volatility surface for your choice just prior to the large trade. If space is an issue concentrate on a surface centered about the strike and series of the large trade
b) Follow the vol surface minute by minute after the large trade
c) Can you characterize a relaxation time scale for the vol surface
d) Propose a possible trading scheme for a high frequency trader to monetize a future disturbance of this kind
In the LiveVol database we have trade tick data and minute by minute calcualations for Apple, Google and Tesla stocks options in October 2013. We looked at all stocks and analyzed the largest option trades at least one week removed from an earnings date.
In class we saw an interesting example where the whole volatility surface of the stock FDC shifted significantly downwards due to a large option trade. We were hoping that we would be able to find a similar impact from a trade in the tick database but unfortunately we were not able to. That might be due to the fact that all of the stocks we have data for are very liquid so that we would need an extremely large trade to have an obvious and lasting impact on the dynamics of the surface. Since we only have data for one month it might be the case that none of the trades for our stocks in October were large enough to have an impact on price levels or spreads. At least not an impact that is observable for more than a minute.
c)
If we had found a trade that clearly impacted the surface we could have characterized a relaxation time by observing how long it took the surface to revert back to it's "regular" state from before the trade. Since we can't observe a disturbance in the volatilty suraface we can think of two possibilities. If there is a temporary disturbance in the surface due to a large trade it either subdues in less than a minute or the trade needs to be larger relative to average trade size than any of the examples we have observed to appear in the first place.
d)
If one can reliably identify a relaxtion time for the surface of a particular stock and the relaxation time is long enough so that trades can be made before the surface reverts to it's normal state then a simple trading scheme would be take a mean-reverting position. A trader could monitor the options on the stock and whenever he sees a large trade that he expects will disturb the volatility surface he can go short (long) implied volatility on options with the same or similar strike and expiration if he expects the disturbance to be an upwards (downwards) shift. Then close the position after the surface has reverted to it's normal state. Although he would always have to be aware of other events surrounding the options such as earnings, expirations, etc.
Closest earnings date: 2013-10-28
Analysis Date Range: 2013-1-1 - 2013-10-21
# Get 20 largest trades for Apple
q = '''
-- Get tick data for stock
select symbol, [timestamp], expiration, strike, optiontype,
tradesize, tradePrice, tradeConditionID, canceledTradeConditionID
into #data_trade
from XFDATA.dbo.lv_options_trades
where symbol = 'AAPL'
and timestamp < '2013-10-21'
and tradeSize > 0
-- Find largest trades
select top 20 *
from #data_trade
order by tradeSize desc
drop table #data_trade
'''
apple_trades = query_dataframe(q)
apple_trades
The largest trade occured at 12:32 on October 9th and was for 5000 call option contracts with expiration 2013-11-16 and strike 530. However, it's not two times larger than any trade for the following three days since the second (4500) and third (3021) largest trades occur the next day. Looking at the three trading days following those trades we can see that they don't fulfil the criteria either. Since there are no trades that match the criteria in the problem description we will just analyze the largest trade of the month.
# Get volatility surface data
q = '''
select symbol, [timestamp], expiration, strike, [open], high, low, [close],
tradeVolume, bidsize, bestbid, asksize, bestask, impliedUndPrice, activeUndPrice, iv,
datediff(day,convert(date,[timestamp]),expiration) as ExpD
from XFDATA.dbo.lv_minute_options_calcs
where symbol = 'AAPL'
and root = 'AAPL'
and timestamp between '2013-10-09 12:25' and '2013-10-09 12:40'
and optionType = 'c'
and iv > 0.05
and strike between 450 and 550
and datediff(day,convert(date,[timestamp]),expiration) < 100
order by timestamp, strike, expiration
'''
# Display minute-by-minute date around trade (K=530,T=2013-11-16)
apple_large = query_dataframe(q)
mask = (apple_large.strike == 530) & (apple_large.ExpD == 38)
apple_large.loc[mask,:]
Interestingly it seems like the trade had no discernable impact, at least not on it's own series (K=530,T=2013-11-16). There is almost no trading activity before or after the trade, the price is similar and the spread does not change after trade (stays at around 10 cents).
The trade price is lower than the best bid so it looks like someone is selling these contracts and since there is no observable impact we can conclude that this trade was simply not large enough to have an impact on the market. In other words there is enough liquidity in Apple to handle this transaction. However, we must keep in mind that our data is only on a minute-by-minute scale and there might have been a short term impact on the bid ask spread which was resolved in less than a minute, but we can't analyze that using the data we have access to.
We plot the volatility surface centered around the strike and expiration of our option from 12:25 to 12:40, as is expected there is no observable impact from the trade. If there was an impact we should have seen it occuring at 12:33 as shift or twist in the surface, but the surface stays in a rigid structure throughout the whole period.
# Plot minute-by-minute volatility surface
for idx, row in apple_large.groupby('timestamp'):
# 2D grid construction
spline = sp.interpolate.Rbf(row.strike,row.ExpD,row.iv,function='thin-plate')
xi = np.linspace(min(row.strike), max(row.strike))
yi = np.linspace(min(row.ExpD), max(row.ExpD))
X, Y = np.meshgrid(xi, yi)
# 3D interpolation
Z = spline(X,Y)
fig = plt.figure(figsize=(20, 8))
ax = fig.gca(projection='3d')
surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1,cmap=plt.cm.coolwarm, linewidth=0.5, antialiased=True)
fig.colorbar(surf, shrink=0.5, aspect=5)
ax.set_xlabel('Strike')
ax.set_ylabel('Time-to-maturity')
ax.set_zlabel('Implied volatility')
ax.set_zlim([0,1])
plt.title(idx)
Closest earnings date: 2013-10-17
Analysis Date Range: 2013-1-1 - 2013-10-10 and 2013-10-24 - 2013-10-31
# Get 20 largest trades for Google
q = '''
-- Get tick data for stock
select symbol, [timestamp], expiration, strike, optiontype,
tradesize, tradePrice, tradeConditionID, canceledTradeConditionID
into #data_trade
from XFDATA.dbo.lv_options_trades
where symbol = 'GOOG'
and (timestamp < '2013-10-10' or timestamp > '2013-10-24')
and tradeSize > 0
-- Find largest trades
select top 20 *
from #data_trade
order by tradeSize desc
drop table #data_trade
'''
google_trades = query_dataframe(q)
google_trades
The largest trade of October is shared between two trades and it looks like they are a part of a spread. The trade occured at 12:00 on October 25th and was for 1482 put option contracts expiring at the end of that day with strikes 1015 and 1025. Since the trade occured at the expiration date of the options we can reasonably assume that in this case someone is closing out a spread position and it's unlikely that the trade had an impact on the market. No other smaller trades in the month fulfil the criteria we are looking for so we will go forward and analyze the activity around this trade.
# Get volatility surface data
q = '''
select symbol, [timestamp], expiration, strike, [open], high, low, [close],
tradeVolume, bidsize, bestbid, asksize, bestask, impliedUndPrice, activeUndPrice, iv,
datediff(day,convert(date,[timestamp]),expiration) as ExpD
from XFDATA.dbo.lv_minute_options_calcs
where symbol = 'GOOG'
and root = 'GOOG'
and timestamp between '2013-10-25 11:50' and '2013-10-25 12:10'
and optionType = 'p'
and strike between 900 and 1100
and datediff(day,convert(date,[timestamp]),expiration) < 100
and iv > 0.05
order by timestamp, strike, expiration
'''
# Display minute-by-minute date around trade (K=1025,T=2013-10-25)
google_large = query_dataframe(q)
mask = (google_large.strike == 1015) & (google_large.ExpD == 0)
mask2 = (google_large.strike == 1025) & (google_large.ExpD == 0)
google_large.loc[mask,:].append(google_large.loc[mask2,:])
The table above show the price and volatility data for both options around the trade (K=1015,T=2013-11-25) and (K=1025,T=2013-11-25). It looks like this trade is a part of a larger trade that is spread out into parts from 11:58 to 12:01 for around 2000 contracts. We can't see the whole order book for the options but since the bid and ask sizes are so low and it's so close to expiration we can assume that the option is not very liquid. Therefore, it is not surprising to see these big trades temporarly affect the option prices (bid/ask spread) since the trades are likely clearing up a big portion of the order book. After the trades we can observe that the option prices are gradually climbing but that is probably due to the fact that the stock price is dropping and since the option is really close to expiration it's price is very sensitive to all underlying price changes. At least it is hard to attribute it to this trade with any confidence and there is no clear disturbance followed by a relaxation. It is more informative to look at the big picture, the whole volatility surface, to determine if the trade had an effect on the dynamics of the stock options that day.
We plot the volatility surface centered around the strike and expiration of our option from 10:50 to 12:10, there does seem to be some action in the shorter maturity 1000 to 1100 strikes but after closer inspection these changes are simply due to missing data. Calculations are missing from the database on all timestamps where the surface is flat on those regions. Example: All data available at 11:53 but K=[1000,1100] has missing data on 11:54. Taking this into account we can't spot any impact obvious impacts on the dynamics of the surface from the trades. The minute-by-minute dynamics are similar to those we observed when looking at it in the three days leading up to the trade.
# Plot minute-by-minute volatility surface
for idx, row in google_large.groupby('timestamp'):
# 2D grid construction
spline = sp.interpolate.Rbf(row.strike,row.ExpD,row.iv,function='thin-plate')
xi = np.linspace(min(row.strike), max(row.strike))
yi = np.linspace(min(row.ExpD), max(row.ExpD))
X, Y = np.meshgrid(xi, yi)
# 3D interpolation
Z = spline(X,Y)
fig = plt.figure(figsize=(15, 8))
ax = fig.gca(projection='3d')
surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1,cmap=plt.cm.coolwarm, linewidth=0.5, antialiased=True)
fig.colorbar(surf, shrink=0.5, aspect=5)
ax.set_xlabel('Strike')
ax.set_ylabel('Time-to-maturity')
ax.set_zlabel('Implied volatility')
ax.set_zlim([0,2])
plt.title(idx)
Closest earnings date: 2013-11-5
Analysis Date Range: 2013-1-1 - 2013-10-29
# Get 20 largest trades for Tesla
q = '''
-- Get tick data for stock
select symbol, [timestamp], expiration, strike, optiontype,
tradesize, tradePrice, tradeConditionID, canceledTradeConditionID
into #data_trade
from XFDATA.dbo.lv_options_trades
where symbol = 'TSLA'
and timestamp < '2013-10-29'
and tradeSize > 0
-- Find largest trades
select top 20 *
from #data_trade
order by tradeSize desc
drop table #data_trade
'''
tesla_trades = query_dataframe(q)
tesla_trades
It looks like we have the same case as for Google for the largest trade, it is shared between two trades and it looks like it is someone closing out a spread trade. It occured on October 10th at 10:25 and was for 2237 call option contracts at strikes 180 and 185. In addition, it seems like the 6 largest trades in October are all spread trades. We don't expect these spread trades to have a noticable impact on the market or surface dynamics but since no other smaller trades in the month fulfil the criteria we are looking for so we will go forward and analyze the activity around this trade.
# Get volatility surface data
q = '''
select symbol, [timestamp], expiration, strike, [open], high, low, [close],
tradeVolume, bidsize, bestbid, asksize, bestask, impliedUndPrice, activeUndPrice, iv,
datediff(day,convert(date,[timestamp]),expiration) as ExpD
from XFDATA.dbo.lv_minute_options_calcs
where symbol = 'TSLA'
and timestamp between '2013-10-10 10:20' and '2013-10-10 10:35'
and optionType = 'c'
and strike between 150 and 250
and datediff(day,convert(date,[timestamp]),expiration) < 100
and iv > 0.05
order by timestamp, strike, expiration
'''
# Display minute-by-minute date around trade (K=185,T=2013-10-11)
tesla_large = query_dataframe(q)
tesla_large.head()
mask = (tesla_large.strike == 180) & (tesla_large.ExpD == 1)
mask2 = (tesla_large.strike == 185) & (tesla_large.ExpD == 1)
tesla_large.loc[mask,:].append(tesla_large.loc[mask2,:])
The table above show the price and volatility data for both options around the trade (K=180,T=2013-10-11) and (K=1025,T=2013-10-11). The analysis for Google pretty much applies here as well, we have a spread of options close to expiry and large trades. The trades do seem to affect the option bid/ask spreads although less here than in the Google case. But there is no clear period of disturbance followed by a relaxation.
We plot the volatility surface centered around the strike and expiration of our option from 10:25 to 10:40. Yet again the volatility surface shows no disturbance around the trade (10:26).
# Plot minute-by-minute volatility surface
for idx, row in tesla_large.groupby('timestamp'):
# 2D grid construction
spline = sp.interpolate.Rbf(row.strike,row.ExpD,row.iv,function='thin-plate')
xi = np.linspace(min(row.strike), max(row.strike))
yi = np.linspace(min(row.ExpD), max(row.ExpD))
X, Y = np.meshgrid(xi, yi)
# 3D interpolation
Z = spline(X,Y)
fig = plt.figure(figsize=(15, 8))
ax = fig.gca(projection='3d')
surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1,cmap=plt.cm.coolwarm, linewidth=0.5, antialiased=True)
fig.colorbar(surf, shrink=0.5, aspect=5)
ax.set_xlabel('Strike')
ax.set_ylabel('Time-to-maturity')
ax.set_zlabel('Implied volatility')
ax.set_zlim([0,2])
plt.title(idx)